<<<<<<< HEAD

Source file ⇒ hope_final.Rmd

water_residential <- "/Users/Hope/Desktop/stats133/uw_supplier_data040416.csv"
=======

Source file ⇒ hope_final.Rmd

water_residential <- "/Users/jann/stat133-spring2016/uw_supplier_data040416.csv"
>>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e
new_water_residential <- water_residential  %>%
  read.file() %>%
  select(Supplier.Name, Stage.Invoked, Mandatory.Restrictions, 
         Reporting.Month, CALCULATED.R.GPCD.Reporting.Month..Values.calculated.by.Water.Board.staff.using.methodology.available.at.http...www.waterboards.ca.gov.waterrights.water_issues.programs.drought.docs.ws_tools.guidance_estimate_res_gpcd.pdf., Hydrologic.Region, Penalties.Assessed, X..Residential.Use)
## Reading data with read.csv()
names(new_water_residential)[names(new_water_residential) == 'CALCULATED.R.GPCD.Reporting.Month..Values.calculated.by.Water.Board.staff.using.methodology.available.at.http...www.waterboards.ca.gov.waterrights.water_issues.programs.drought.docs.ws_tools.guidance_estimate_res_gpcd.pdf.'] <- 'resid_use'
new_water_residential$Reporting.Month <- new_water_residential$Reporting.Month%>%
  mdy()

Case Study: Bay Area Counties vs Los Angeles County, who is doing their part?

#los angeles water suppliers
#want all of los angeles county so any water supplier that matches the cities within LA county
#some gsub to get this nice list of the cities in LA county

lacities <- "Agoura Hills Alhambra Arcadia Artesia Avalon Azusa Baldwin Park Bell Bell Gardens Bellflower Beverly Hills Bradbury Burbank Calabasas Carson Cerritos Claremont Commerce Compton Covina Cudahy Culver City Diamond Bar Downey Duarte El Monte El Segundo Gardena Glendale Glendora Hawaiian Gardens Hawthorne Hermosa Beach Hidden Hills Huntington Park Industry Inglewood Irwindale La Cañada Flintridge La Habra Heights La Mirada La Puente La Verne Lakewood Lancaster Lawndale Lomita Long Beach Los Angeles Lynwood Malibu Manhattan Beach Maywood Monrovia Montebello Monterey Park Norwalk Palmdale Palos Verdes Estates Paramount Pasadena Pico Rivera Pomona Rancho Palos Verdes Redondo Beach Rolling Hills Rolling Hills Estates Rosemead San Dimas San Fernando San Gabriel San Marino Santa Clarita Santa Fe Springs Santa Monica Sierra Madre Signal Hill South El Monte South Gate South Pasadena Temple City Torrance Vernon Walnut West Covina West Hollywood Westlake Village Whittier"
lacities <- gsub(" " , ")|(" , lacities)

lacities
## [1] "Agoura)|(Hills)|(Alhambra)|(Arcadia)|(Artesia)|(Avalon)|(Azusa)|(Baldwin)|(Park)|(Bell)|(Bell)|(Gardens)|(Bellflower)|(Beverly)|(Hills)|(Bradbury)|(Burbank)|(Calabasas)|(Carson)|(Cerritos)|(Claremont)|(Commerce)|(Compton)|(Covina)|(Cudahy)|(Culver)|(City)|(Diamond)|(Bar)|(Downey)|(Duarte)|(El)|(Monte)|(El)|(Segundo)|(Gardena)|(Glendale)|(Glendora)|(Hawaiian)|(Gardens)|(Hawthorne)|(Hermosa)|(Beach)|(Hidden)|(Hills)|(Huntington)|(Park)|(Industry)|(Inglewood)|(Irwindale)|(La)|(Cañada)|(Flintridge)|(La)|(Habra)|(Heights)|(La)|(Mirada)|(La)|(Puente)|(La)|(Verne)|(Lakewood)|(Lancaster)|(Lawndale)|(Lomita)|(Long)|(Beach)|(Los)|(Angeles)|(Lynwood)|(Malibu)|(Manhattan)|(Beach)|(Maywood)|(Monrovia)|(Montebello)|(Monterey)|(Park)|(Norwalk)|(Palmdale)|(Palos)|(Verdes)|(Estates)|(Paramount)|(Pasadena)|(Pico)|(Rivera)|(Pomona)|(Rancho)|(Palos)|(Verdes)|(Redondo)|(Beach)|(Rolling)|(Hills)|(Rolling)|(Hills)|(Estates)|(Rosemead)|(San)|(Dimas)|(San)|(Fernando)|(San)|(Gabriel)|(San)|(Marino)|(Santa)|(Clarita)|(Santa)|(Fe)|(Springs)|(Santa)|(Monica)|(Sierra)|(Madre)|(Signal)|(Hill)|(South)|(El)|(Monte)|(South)|(Gate)|(South)|(Pasadena)|(Temple)|(City)|(Torrance)|(Vernon)|(Walnut)|(West)|(Covina)|(West)|(Hollywood)|(Westlake)|(Village)|(Whittier"
#NOT SHOWN:then manually put a . in between cities with more than one word in the name to account for spaces
#below is the dataset that only has los angeles county levels of water usage

losangeles.avg <- new_water_residential%>%
  filter(grepl("(Agoura.Hills)|(Alhambra)|(Arcadia)|(Artesia)|(Avalon)|(Azusa)|(Baldwin.Park)|(Bell)|(Bell.Gardens)|(Bellflower)|(Beverly.Hills)|(Bradbury)|(Burbank)|(Calabasas)|(Carson)|(Cerritos)|(Claremont)|(Commerce)|(Compton)|(Covina)|(Cudahy)|(Culver.City)|(Diamond.Bar)|(Downey)|(Duarte)|(El.Monte)|(El.Segundo)|(Gardena)|(Glendale)|(Glendora)|(Hawaiian)|(Gardens)|(Hawthorne)|(Hermosa.Beach)|(Hidden.Hills)|(Huntington.Park)|(Industry)|(Inglewood)|(Irwindale)|(La.Cañada.Flintridge)|(La.Habra.Heights)|(La.Mirada)|(La.Puente)|(La.Verne)|(Lakewood)|(Lancaster)|(Lawndale)|(Lomita)|(Long.Beach)|(Los.Angeles)|(Lynwood)|(Malibu)|(Manhattan.Beach)|(Maywood)|(Monrovia)|(Montebello)|(Monterey.Park)|(Norwalk)|(Palmdale)|(Palos.Verdes.Estates)|(Paramount)|(Pasadena)|(Pico.Rivera)|(Pomona)|(Rancho)|(Palos.Verdes)|(Redondo.Beach)|(Rolling.Hills)|(Rolling.Hills.Estates)|(Rosemead)|(San.Dimas)|(San.Fernando)|(San.Gabriel)|(San.Marino)|(Santa.Clarita)|(Santa.Fe.Springs)|(Santa.Monica)|(Sierra.Madre)|(Signal.Hill)|(South.El.Monte)|(South.Gate)|(South.Pasadena)|(Temple.City)|(Torrance)|(Vernon)|(Walnut)|(West.Covina)|(West.Hollywood)|(Westlake)|(Village)|(Whittier)", Supplier.Name))%>%
  group_by(Reporting.Month)%>%
  summarise(la_use_avg = mean(resid_use))
#there are multiple counties surrounding the bay: SF, Marin, Sonoma, Napa, Solano, Contra Costa, Alameda, Santa Clara, and San Mateo. Eastbay

sfbay <- "Alameda, California
+ Albany, California
+ American Canyon, California
+ Antioch, California
+ Atherton, California
+ B
+ Belmont, California
+ Belvedere, California
+ Benicia, California
+ Berkeley, California
+ Brentwood, California
+ Brisbane, California
+ Burlingame, California
+ C
+ Calistoga, California
+ Campbell, California
+ Clayton, California
+ Cloverdale, California
+ Colma, California
+ Concord, California
+ Corte Madera, California
+ Cotati, California
+ Cupertino, California
+ D
+ Daly City, California
+ Danville, California
+ Dixon, California
+ Dublin, California
+ E
+ East Palo Alto, California
+ El Cerrito, California
+ Emeryville, California
+ F
+ Fairfax, California
+ Foster City, California
+ Fremont, California
+ G
+ Gilroy, California
+ H
+ Half Moon Bay, California
+ Hayward, California
+ Healdsburg, California
+ Hercules, California
+ Hillsborough, California
+ L
+ Lafayette, California
+ Larkspur, California
+ Livermore, California
+ Los Altos, California
+ Los Altos Hills, California
+ Los Gatos, California
+ M
+ Martinez, California
+ Menlo Park, California
+ Mill Valley, California
+ Millbrae, California
+ Milpitas, California
+ Monte Sereno, California
+ Moraga, California
+ Morgan Hill, California
+ Mountain View, California
+ N
+ Napa, California
+ Newark, California
+ Novato, California
+ O
+ Oakland, California
+ Oakley, California
+ Orinda, California
+ P
+ Pacifica, California
+ Palo Alto, California
+ Petaluma, California
+ Piedmont, California
+ Pinole, California
+ Pittsburg, California
+ Pleasant Hill, California
+ Pleasanton, California
+ Portola Valley, California
+ R
+ Redwood City, California
+ Richmond, California
+ Rio Vista, California
+ Rohnert Park, California
+ Ross, California
+ S
+ St. Helena, California
+ San Anselmo, California
+ San Carlos, California
+ San Francisco
+ San Jose, California
+ San Leandro, California
+ San Mateo, California
+ San Pablo, California
+ San Rafael, California
+ San Ramon, California
+ Santa Clara, California
+ Santa Rosa, California
+ Saratoga, California
+ Sausalito, California
+ Sebastopol, California
+ Sonoma, California
+ South San Francisco, California
+ Suisun City, California
+ Sunnyvale, California
+ T
+ Tiburon, California
+ U
+ Union City, California
+ V
+ Vacaville, California
+ Vallejo, California
+ W
+ Walnut Creek, California
+ Windsor, California
+ Woodside, California
+ Y
+ Yountville, California"

sfbay <- gsub("California", "", sfbay) #get rid of california
sfbay <- gsub("\n[A-Z]\n", "", sfbay) #get rid of the headers for each section of cities beginning with a certain letter
sfbay <- gsub("\n", "", sfbay) #getting rid of extra newlines
sfbay <- gsub(", " , ")|(", sfbay) #inputting the separations for when I use grepl later

sfbay  #want to look at it so I can copy/paste and make small edits for when I use it in grepl
## [1] "Alameda)|(+ Albany)|(+ American Canyon)|(+ Antioch)|(+ Atherton)|(+ B+ Belmont)|(+ Belvedere)|(+ Benicia)|(+ Berkeley)|(+ Brentwood)|(+ Brisbane)|(+ Burlingame)|(+ C+ Calistoga)|(+ Campbell)|(+ Clayton)|(+ Cloverdale)|(+ Colma)|(+ Concord)|(+ Corte Madera)|(+ Cotati)|(+ Cupertino)|(+ D+ Daly City)|(+ Danville)|(+ Dixon)|(+ Dublin)|(+ E+ East Palo Alto)|(+ El Cerrito)|(+ Emeryville)|(+ F+ Fairfax)|(+ Foster City)|(+ Fremont)|(+ G+ Gilroy)|(+ H+ Half Moon Bay)|(+ Hayward)|(+ Healdsburg)|(+ Hercules)|(+ Hillsborough)|(+ L+ Lafayette)|(+ Larkspur)|(+ Livermore)|(+ Los Altos)|(+ Los Altos Hills)|(+ Los Gatos)|(+ M+ Martinez)|(+ Menlo Park)|(+ Mill Valley)|(+ Millbrae)|(+ Milpitas)|(+ Monte Sereno)|(+ Moraga)|(+ Morgan Hill)|(+ Mountain View)|(+ N+ Napa)|(+ Newark)|(+ Novato)|(+ O+ Oakland)|(+ Oakley)|(+ Orinda)|(+ P+ Pacifica)|(+ Palo Alto)|(+ Petaluma)|(+ Piedmont)|(+ Pinole)|(+ Pittsburg)|(+ Pleasant Hill)|(+ Pleasanton)|(+ Portola Valley)|(+ R+ Redwood City)|(+ Richmond)|(+ Rio Vista)|(+ Rohnert Park)|(+ Ross)|(+ S+ St. Helena)|(+ San Anselmo)|(+ San Carlos)|(+ San Francisco+ San Jose)|(+ San Leandro)|(+ San Mateo)|(+ San Pablo)|(+ San Rafael)|(+ San Ramon)|(+ Santa Clara)|(+ Santa Rosa)|(+ Saratoga)|(+ Sausalito)|(+ Sebastopol)|(+ Sonoma)|(+ South San Francisco)|(+ Suisun City)|(+ Sunnyvale)|(+ T+ Tiburon)|(+ U+ Union City)|(+ V+ Vacaville)|(+ Vallejo)|(+ W+ Walnut Creek)|(+ Windsor)|(+ Woodside)|(+ Y+ Yountville)|("
bayavg <-  new_water_residential%>%
  filter(grepl( "(Alameda)|(Albany)|(American.Canyon)|(Antioch)|(Atherton)|(Belmont)|(Belvedere)|(Benicia)|(Berkeley)|(Brentwood)|(Brisbane)|(Burlingame)|(Calistoga)|(Campbell)|(Clayton)|(Cloverdale)|(Colma)|(Concord)|(Corte.Madera)|(Cotati)|(Cupertino)|(Daly.City)|(Danville)|(Dixon)|(Dublin)|(East.Palo.Alto)|(El.Cerrito)|(Emeryville)|(Fairfax)|(Foster.City)|(Fremont)|(Gilroy)|(Half.Moon.Bay)|(Hayward)|(Healdsburg)|(Hercules)|(Hillsborough)|(Lafayette)|(Larkspur)|(Livermore)|(Los.Altos)|(Los.Altos.Hills)|(Los.Gatos)|(Martinez)|(Menlo.Park)|(Mill.Valley)|(Millbrae)|(Milpitas)|(Monte.Sereno)|(Moraga)|(Morgan.Hill)|(Mountain.View)|(Napa)|(Newark)|(Novato)|(Oakland)|(Oakley)|(Orinda)|(Pacifica)|(Palo.Alto)|(Petaluma)|(Piedmont)|(Pinole)|(Pittsburg)|(Pleasant.Hill)|(Pleasanton)|(Portola.Valley)|(Redwood.City)|(Richmond)|(Rio.Vista)|(Rohnert.Park)|(Ross)|(St..Helena)|(San.Anselmo)|(San.Carlos)|(San.FranciscoSan.Jose)|(San.Leandro)|(San.Mateo)|(San.Pablo)|(San.Rafael)|(San.Ramon)|(Santa.Clara)|(Santa.Rosa)|(Saratoga)|(Sausalito)|(Sebastopol)|(Sonoma)|(South.San.Francisco)|(Suisun.City)|(Sunnyvale)|(Tiburon)|(Union.City)|(Vacaville)|(Vallejo)|(Walnut.Creek)|(Windsor)|(Woodside)|(Yountville)|(East.Bay)", Supplier.Name ))%>%
  group_by(Reporting.Month)%>%
  summarise(bay_use_avg = mean(resid_use))
# we want to compare to california as a whole so let's take the average for every month for the whole state!

state.avg <- new_water_residential%>%
  group_by(Reporting.Month)%>%
  summarise(state_use_avg = mean(resid_use))

Now let’s combine all three of these datasets so that we have just one dataset we’re plotting from

la_state <- state.avg%>%
  inner_join(losangeles.avg, by = c("Reporting.Month" = "Reporting.Month"))

alljoined <- la_state%>%
  inner_join(bayavg, by = c("Reporting.Month" = "Reporting.Month"))

#now that I have everything joined, I have to make it tidy
#using gather to make it narrow

narrowall <- alljoined%>%
  gather(key = boundary, value = avg_resid_use, state_use_avg, la_use_avg, bay_use_avg)

narrowall$boundary <- gsub("state_use_avg", "California", narrowall$boundary)

narrowall$boundary <- gsub("la_use_avg", "Los Angeles County", narrowall$boundary)

narrowall$boundary <- gsub("bay_use_avg", "All 9 Bay Area Counties", narrowall$boundary)

Time to plot!

  ggplot(narrowall, aes(x=Reporting.Month, y= avg_resid_use, color = boundary)) +
  geom_point() + 
  stat_smooth(se=FALSE, method="loess") +
<<<<<<< HEAD
  labs(x = "Month", y = "Average Residential Water Usage", title = "Average Residential Water Usage vs Month") +
  theme(legend.key = element_rect(colour = "black"),
        plot.background = element_rect(colour = "grey"),
        panel.background = element_rect(fill = "grey"),
        panel.background = element_rect(color = "black"),
        panel.grid.minor = element_line(linetype = "dotted"), 
        axis.title = element_text(size = rel(1.5)),
        axis.text = element_text(size = rel(1.0)),
        legend.text = element_text(size = rel(1.0)),
        plot.title = element_text(size = rel(2))
        ) +
  scale_colour_manual("Region", values = c("green", "blue","yellow"))

======= labs(x = "Month", y = "Average Residential Water Usage", title = "Water Usage vs Month: in LA County and all 9 Bay Area Counties") + theme_minimal() + theme(plot.title = element_text(size = 25), axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18) )

>>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e

Analysis of Case Study

initial analysis: Most important thing to note is that the average usage for Bay Area counties are consistently lower than LA county usage. Overall California trend, LA, and Bay area all follow similar seasonal changes in usage.

drought severity in California over time

#install.packages("mapdata")
#install.packages("ggmap")
library(mapdata)
## Loading required package: maps
## 
##  # maps v3.1: updated 'world': all lakes moved to separate new #
##  # 'lakes' database. Type '?world' or 'news(package="maps")'.  #
library(ggmap)

#data set that has drought by county

<<<<<<< HEAD
drought_severity <- "/Users/Hope/Desktop/stats133/countydroughtseverity.csv"
=======
drought_severity <- "/Users/jann/stat133-spring2016/countydroughtseverity.csv"
>>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e
drought_severity <- drought_severity%>%
  read.file()
## Reading data with read.csv()
drought_severity$county <- gsub(" County", "", drought_severity$county)
drought_severity$county <- sapply(drought_severity$county, tolower)

names(drought_severity)[names(drought_severity) == "county"] <- "subregion"
<<<<<<< HEAD
drought_severity <- data.frame(drought_severity)

#adding month and year column for later
drought_severity$releaseDate <- as.Date(drought_severity$releaseDate)
drought_severity$month <- months(drought_severity$releaseDate)
drought_severity$year <- year(ymd(drought_severity$releaseDate)) 
#narrow version of drought_severity 
#taking the average for each month for each category of severity for simplicity

tidy_severity <- drought_severity%>%
  gather(key = category, value = value, NONE, D0, D1, D2, D3, D4)%>%
  select(subregion, month, year, category, value, FIPS)%>%
    group_by(month, year, subregion, category)%>%
  summarise(ave_value = mean(value))

#just making sure everything is in the correct format for ease of plotting
tidy_severity <- data.frame(tidy_severity)
tidy_severity$year <- as.numeric(tidy_severity$year)

=======

>>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e

#coordinates for the california counties
CAcounties <- map_data('county')%>%
  filter(region == "california")
CAcounties <- data.frame(CAcounties)

<<<<<<< HEAD

#merge drought information with coordinates for each county
CAcountiesvalues <- CAcounties%>%
  right_join(tidy_severity, by = "subregion")
=======
#joining two previous data sets so it's all in one
drought_coord <- drought_severity%>%
  inner_join(CAcounties, by = "subregion" )


#would rather use this over drought_severity but trying to figure out what kind of variables are necessary in the dataset and the map data for geom_map
>>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e


#creating levels for mapping later
CAcountiesvalues$category <- factor(CAcountiesvalues$category, levels = c("D4", "D3", "D2", "D1", "D0", "NONE"))

CAcountiesvalues$month <- factor(CAcountiesvalues$month, levels = c("January", "February", "March", "April", "May","June", "July", "August","September", "October", "November","December"))
#okay so this makes the really large grid with many maps, may just use this as an overview, then take pieces from it?

<<<<<<< HEAD
CAcountiesvalues%>% 
  ggplot() + 
  geom_polygon(aes (x = long, y = lat, group = group, fill = ave_value), colour = "white", size = 0.02)+ scale_fill_gradient(low = "yellow", high = "red") + 
  facet_grid(category ~ year) +
  labs(x = "Year", y = "Drought Severity", title = "Drought Severity vs Year") +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),axis.text.y=element_blank(),axis.ticks=element_blank())

Comments

Well this one doesn’t necessarily need to be used since we have better maps below, but if you did want to include this I would mention how in the two previous droughts in California, no counties reached “Exceptional Drought” levels, in contrast to 2014-2016 when multiple counties are registering as having “Exceptional Drought” levels. This in conjunction with Tiffany’s plot of # of counties vs drought level would be interesting side by side, because in these maps you can see that for 2014 and 2015 there were no counties that had “NONE” or no level of drought, and in 2016 we see some counties highlighted again. So Tiffany’s plot should tell us what number of counties there are that have “NONE” while this one shows where.


selected data

timelapse <- "/Users/Hope/Desktop/stats133/countydroughtseverity.csv"
timelapse <- timelapse  %>%
  read.file() %>%
  select(releaseDate, county, FIPS, NONE, D0, D1, D2, D3, D4)
## Reading data with read.csv()
head(timelapse)
##   releaseDate         county FIPS NONE  D0  D1     D2    D3    D4
## 1  2016-04-12 Alameda County 6001    0 100 100  82.42 44.83 16.37
## 2  2016-04-05 Alameda County 6001    0 100 100  82.42 44.83 16.37
## 3  2016-03-29 Alameda County 6001    0 100 100  82.42 44.83 20.82
## 4  2016-03-22 Alameda County 6001    0 100 100  82.42 44.83 20.82
## 5  2016-03-15 Alameda County 6001    0 100 100  82.42 44.83 20.82
## 6  2016-03-08 Alameda County 6001    0 100 100 100.00 98.46 20.82

data manipulation

bins <- timelapse %>% mutate(D0=D0-D1, D1=D1-D2, D2=D2-D3, D3=D3-D4)

bins$releaseDate <- as.Date(bins$releaseDate)
bins$month <- months(bins$releaseDate)
bins$year <- year(ymd(bins$releaseDate)) 

head(bins)
##   releaseDate         county FIPS NONE D0    D1    D2    D3    D4 month
## 1  2016-04-12 Alameda County 6001    0  0 17.58 37.59 28.46 16.37 April
## 2  2016-04-05 Alameda County 6001    0  0 17.58 37.59 28.46 16.37 April
## 3  2016-03-29 Alameda County 6001    0  0 17.58 37.59 24.01 20.82 March
## 4  2016-03-22 Alameda County 6001    0  0 17.58 37.59 24.01 20.82 March
## 5  2016-03-15 Alameda County 6001    0  0 17.58 37.59 24.01 20.82 March
## 6  2016-03-08 Alameda County 6001    0  0  0.00  1.54 77.64 20.82 March
##   year
## 1 2016
## 2 2016
## 3 2016
## 4 2016
## 5 2016
## 6 2016

weighted average

bins <- bins %>% mutate( FIPS_avg = (1*D0 + 2*D1 + 3*D2 + 4*D3 + 5*D4) /100 - 1) %>% group_by(month, year, FIPS, county) %>% summarise(monthly_avg = round(mean(FIPS_avg)))

bins$county <- gsub(" County", "", bins$county)
bins$county <- sapply(bins$county, tolower)

names(bins)[names(bins) == "county"] <- "subregion"
bins<- data.frame(bins)

head(bins)
##   month year FIPS subregion monthly_avg
## 1 April 2000 6001   alameda          -1
## 2 April 2000 6003    alpine          -1
## 3 April 2000 6005    amador          -1
## 4 April 2000 6007     butte          -1
## 5 April 2000 6009 calaveras          -1
## 6 April 2000 6011    colusa          -1
drought_levels <- function(x) {
         (
          if (x==-1) {
            "No Drought (NONE)"
          } else if (x==0) {
            "Abnormally Dry (D0)"
          } else if (x==1) {
            "Moderate Drought (D1)"
          } else if (x==2) {
            "Severe Drought (D2)"
          } else if (x==3) {
            "Extreme Drought (D3)"
          } else
            "Exceptional Drought (D4)"
         )
}

bins <- bins %>% mutate(monthly_avg=sapply(monthly_avg,drought_levels))
head(bins)
##   month year FIPS subregion       monthly_avg
## 1 April 2000 6001   alameda No Drought (NONE)
## 2 April 2000 6003    alpine No Drought (NONE)
## 3 April 2000 6005    amador No Drought (NONE)
## 4 April 2000 6007     butte No Drought (NONE)
## 5 April 2000 6009 calaveras No Drought (NONE)
## 6 April 2000 6011    colusa No Drought (NONE)
binscounties <- bins%>%
  right_join(CAcounties, by = c("subregion"))

plot manipulation

binscounties$month <- factor(binscounties$month, levels = c("January", "February", "March", "April", "May","June", "July", "August","September", "October", "November","December"))

binscounties$monthly_avg <- factor(binscounties$monthly_avg, levels = c("No Drought (NONE)", "Abnormally Dry (D0)", "Moderate Drought (D1)", "Severe Drought (D2)", "Extreme Drought (D3)", "Exceptional Drought (D4)"))

binscounties <- binscounties %>%
  group_by(year)%>%
  arrange(month)%>%
  mutate(completedate = (paste(month, year)))

head(binscounties) 
## Source: local data frame [6 x 11]
## Groups: year [1]
## 
##     month  year  FIPS subregion       monthly_avg      long      lat group
##    (fctr) (dbl) (int)     (chr)            (fctr)     (dbl)    (dbl) (dbl)
## 1 January  2000  6001   alameda No Drought (NONE) -121.4785 37.48290   157
## 2 January  2000  6001   alameda No Drought (NONE) -121.5129 37.48290   157
## 3 January  2000  6001   alameda No Drought (NONE) -121.8853 37.48290   157
## 4 January  2000  6001   alameda No Drought (NONE) -121.8968 37.46571   157
## 5 January  2000  6001   alameda No Drought (NONE) -121.9254 37.45998   157
## 6 January  2000  6001   alameda No Drought (NONE) -121.9483 37.47717   157
## Variables not shown: order (int), region (chr), completedate (chr)

create levels for all months/years

months <- c("January", "February", "March", "April", "May","June", "July", "August","September", "October", "November","December")

months <- rep(months, times = 17)

years <- rep(2000:2016, each = 12)

monthsyears <- paste(months, years)

monthsyears <- monthsyears[1:(length(monthsyears) - 8)]

monthsyears
##   [1] "January 2000"   "February 2000"  "March 2000"     "April 2000"    
##   [5] "May 2000"       "June 2000"      "July 2000"      "August 2000"   
##   [9] "September 2000" "October 2000"   "November 2000"  "December 2000" 
##  [13] "January 2001"   "February 2001"  "March 2001"     "April 2001"    
##  [17] "May 2001"       "June 2001"      "July 2001"      "August 2001"   
##  [21] "September 2001" "October 2001"   "November 2001"  "December 2001" 
##  [25] "January 2002"   "February 2002"  "March 2002"     "April 2002"    
##  [29] "May 2002"       "June 2002"      "July 2002"      "August 2002"   
##  [33] "September 2002" "October 2002"   "November 2002"  "December 2002" 
##  [37] "January 2003"   "February 2003"  "March 2003"     "April 2003"    
##  [41] "May 2003"       "June 2003"      "July 2003"      "August 2003"   
##  [45] "September 2003" "October 2003"   "November 2003"  "December 2003" 
##  [49] "January 2004"   "February 2004"  "March 2004"     "April 2004"    
##  [53] "May 2004"       "June 2004"      "July 2004"      "August 2004"   
##  [57] "September 2004" "October 2004"   "November 2004"  "December 2004" 
##  [61] "January 2005"   "February 2005"  "March 2005"     "April 2005"    
##  [65] "May 2005"       "June 2005"      "July 2005"      "August 2005"   
##  [69] "September 2005" "October 2005"   "November 2005"  "December 2005" 
##  [73] "January 2006"   "February 2006"  "March 2006"     "April 2006"    
##  [77] "May 2006"       "June 2006"      "July 2006"      "August 2006"   
##  [81] "September 2006" "October 2006"   "November 2006"  "December 2006" 
##  [85] "January 2007"   "February 2007"  "March 2007"     "April 2007"    
##  [89] "May 2007"       "June 2007"      "July 2007"      "August 2007"   
##  [93] "September 2007" "October 2007"   "November 2007"  "December 2007" 
##  [97] "January 2008"   "February 2008"  "March 2008"     "April 2008"    
## [101] "May 2008"       "June 2008"      "July 2008"      "August 2008"   
## [105] "September 2008" "October 2008"   "November 2008"  "December 2008" 
## [109] "January 2009"   "February 2009"  "March 2009"     "April 2009"    
## [113] "May 2009"       "June 2009"      "July 2009"      "August 2009"   
## [117] "September 2009" "October 2009"   "November 2009"  "December 2009" 
## [121] "January 2010"   "February 2010"  "March 2010"     "April 2010"    
## [125] "May 2010"       "June 2010"      "July 2010"      "August 2010"   
## [129] "September 2010" "October 2010"   "November 2010"  "December 2010" 
## [133] "January 2011"   "February 2011"  "March 2011"     "April 2011"    
## [137] "May 2011"       "June 2011"      "July 2011"      "August 2011"   
## [141] "September 2011" "October 2011"   "November 2011"  "December 2011" 
## [145] "January 2012"   "February 2012"  "March 2012"     "April 2012"    
## [149] "May 2012"       "June 2012"      "July 2012"      "August 2012"   
## [153] "September 2012" "October 2012"   "November 2012"  "December 2012" 
## [157] "January 2013"   "February 2013"  "March 2013"     "April 2013"    
## [161] "May 2013"       "June 2013"      "July 2013"      "August 2013"   
## [165] "September 2013" "October 2013"   "November 2013"  "December 2013" 
## [169] "January 2014"   "February 2014"  "March 2014"     "April 2014"    
## [173] "May 2014"       "June 2014"      "July 2014"      "August 2014"   
## [177] "September 2014" "October 2014"   "November 2014"  "December 2014" 
## [181] "January 2015"   "February 2015"  "March 2015"     "April 2015"    
## [185] "May 2015"       "June 2015"      "July 2015"      "August 2015"   
## [189] "September 2015" "October 2015"   "November 2015"  "December 2015" 
## [193] "January 2016"   "February 2016"  "March 2016"     "April 2016"
binscounties$completedate <- factor(levels = c(monthsyears), binscounties$completedate)


binscounties$completedate <- as.factor( binscounties$completedate)

binscounties <- data.frame(binscounties)

another map of each month since 2000 to april 2014

binscounties%>% 
  ggplot() + 
  geom_polygon(aes (x = long, y = lat, group = group, fill = monthly_avg), colour = "white", size = 0.02)+
  facet_wrap(~completedate, ncol = 12, nrow = 17, drop = TRUE) +
  labs(x = "Month", y = "Year", title = "Drought Severity in California from 2000 to Present") +
  theme(axis.line=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        legend.key = element_rect(colour = "black"),
        plot.background = element_rect(colour = "grey"),
        panel.background = element_rect(fill = "grey"),
        panel.background = element_rect(color = "black"),
        panel.grid.minor = element_line(linetype = "dotted"), 
        axis.title = element_text(size = rel(1.5)),
        legend.text = element_text(size = rel(1.0)),
        plot.title = element_text(size = rel(2))
        ) + 
 scale_fill_manual("Drought Severity", values=c("#7baad8", "#fffc67", "orange", "darkorange3", "orangered3", "firebrick4")) 

map from 2006-2010 to capture the previous drought in California

binscounties%>%
  filter(year >= 2006, year <= 2010)%>%
    ggplot() + 
  geom_polygon(aes (x = long, y = lat, group = group, fill = monthly_avg), colour = "white", size = 0.02)+
  facet_wrap(~completedate, ncol = 12, nrow = 5, drop = TRUE) +
  labs(x = "Month", y = "Year", title = "Previous Drought in California from 2006 to 2010") +
  theme(axis.line=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        legend.key = element_rect(colour = "black"),
        plot.background = element_rect(colour = "grey"),
        panel.background = element_rect(fill = "grey"),
        panel.background = element_rect(color = "black"),
        axis.title = element_text(size = rel(1.5)),
        legend.text = element_text(size = rel(1.0)),
        plot.title = element_text(size = rel(2))
        ) + 
 scale_fill_manual("Drought Severity", values=c("#7baad8", "#fffc67", "orange", "darkorange3", "orangered3", "firebrick4"))

map from 2011 to Present to show current drought:

binscounties%>%
  filter(year >= 2011)%>%
  ggplot() + 
  geom_polygon(aes (x = long, y = lat, group = group, fill = monthly_avg), colour = "white", size = 0.02)+
  facet_wrap(~completedate, ncol = 12, drop = TRUE) +
  labs(x = "Month", y = "Year", title = "Drought Severity in California from 2011 to Present") +
  theme(axis.line=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        legend.key = element_rect(colour = "black"),
        plot.background = element_rect(colour = "grey"),
        panel.background = element_rect(fill = "grey"),
        panel.background = element_rect(color = "black"),
        panel.grid.minor = element_line(linetype = "dotted"), 
        axis.title = element_text(size = rel(1.5)),
        legend.text = element_text(size = rel(1.0)),
        plot.title = element_text(size = rel(2))
        ) + 
  theme(axis.line=element_blank(),axis.text.x=element_blank(),axis.text.y=element_blank(),axis.ticks=element_blank()) + 
 scale_fill_manual("Drought Severity", values=c("#7baad8", "#fffc67", "orange", "darkorange3", "orangered3", "firebrick4"))

Overview analysis/comments

So the first “overview” maps may be too much to put into the presentation, because the image needs to be very large to show anything useful. I definitely think the contrast is good to see, but maybe the “Previous Drought” and the “Current Drought” maps will be enough to show how much worse off we are currently. Again, I think this in conjunction with the plot that Tiff made will be good together

In depth analysis:

======= #trying to plot here, testing different data and map info needed #drought_severity%>% # ggplot() + # geom_map(aes(x = long, y = lat, group = group, map_id = 'subregion', fill = avepercent), map = CAcounties) + # facet_wrap(~year) >>>>>>> fa68aabbc8d96c047df6dfc93d2083f1c3f79e9e